In [1]:
%ls
In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [3]:
pixar_movies = pd.read_csv('PixarMovies.csv')
In [4]:
pixar_movies.shape
Out[4]:
In [5]:
pixar_movies
Out[5]:
In [6]:
pixar_movies.dtypes
Out[6]:
In [7]:
pixar_movies.dropna().describe()
Out[7]:
Because Domestic %
and International %
columns data end with %, and its data type are objects, it is necessary to transfer its data type to float.
In [8]:
pixar_movies['Domestic %'] = pixar_movies['Domestic %'].str.rstrip('%').astype('float')
In [9]:
pixar_movies['International %'] = pixar_movies['International %'].str.rstrip('%').astype('float')
for the score columns, RT Score
and Metacritic Score
are 100 point scale, but IMDB Score
is 10 point scale. IMDB Score
could be changed to 100 point scale.
In [10]:
pixar_movies['IMDB Score'] = pixar_movies['IMDB Score'] * 10
In [11]:
filtered_pixar = pixar_movies.dropna()
In [12]:
pixar_movies.set_index('Movie', inplace=True)
In [13]:
filtered_pixar.set_index('Movie', inplace=True)
In [14]:
pixar_movies
Out[14]:
In [15]:
critics_reviews = pixar_movies[['RT Score', 'IMDB Score', 'Metacritic Score']]
In [16]:
critics_reviews.plot(figsize=(10,6))
plt.show()
In [17]:
critics_reviews.plot(kind='box', figsize=(9,5))
plt.show()
In [18]:
revenue_proportions = filtered_pixar[['Domestic %', 'International %']]
In [19]:
revenue_proportions.plot(kind='bar', stacked=True, figsize=(12,6))
#sns.plt.show()
plt.show()
In [20]:
filtered_pixar[['Oscars Nominated', 'Oscars Won']].plot(kind='bar', figsize=(12,6))
plt.show()
In [ ]: